The fMRI time series are taken from https://paris-saclay-cds.github.io/autism_challenge/. You have to follow their instructions on how to retrieve the relavant fMRI data for the MSDL atlas.
Check out their GitHub repository at: https://github.com/ramp-kits/autism/.
The following code is heavily based on the code provided by the competition's organizers.
In [1]:
import numpy as np
import pandas as pd
import os
In [2]:
autism_data = '/home/makism/Temp/autism/'
In [3]:
def parse_dataset():
_target_column_name = 'asd'
_prediction_label_names = [0, 1]
subject_id = pd.read_csv(os.path.join(autism_data, 'data', 'train.csv'), header=None)
# read the list of the subjects
df_participants = pd.read_csv(os.path.join(autism_data, 'data', 'participants.csv'), index_col=0)
df_participants.columns = ['participants_' + col for col in df_participants.columns]
# load the structural and functional MRI data
df_anatomy = pd.read_csv(os.path.join(autism_data, 'data', 'anatomy.csv'), index_col=0)
df_anatomy.columns = ['anatomy_' + col for col in df_anatomy.columns]
df_fmri = pd.read_csv(os.path.join(autism_data, 'data', 'fmri_filename.csv'), index_col=0)
df_fmri.columns = ['fmri_' + col for col in df_fmri.columns]
# load the QC for structural and functional MRI data
df_anatomy_qc = pd.read_csv(os.path.join(autism_data, 'data', 'anatomy_qc.csv'), index_col=0)
df_fmri_qc = pd.read_csv(os.path.join(autism_data, 'data', 'fmri_qc.csv'), index_col=0)
# rename the columns for the QC to have distinct names
df_anatomy_qc = df_anatomy_qc.rename(columns={"select": "anatomy_select"})
df_fmri_qc = df_fmri_qc.rename(columns={"select": "fmri_select"})
X = pd.concat([df_participants, df_anatomy, df_anatomy_qc, df_fmri, df_fmri_qc], axis=1)
X = X.loc[subject_id[0]]
y = X['participants_asd']
y = y.rename(columns={'participants_asd': _target_column_name})
X = X.drop('participants_asd', axis=1)
return X, y.values
In [4]:
data, labels = parse_dataset()
In [5]:
fmri_data = data[[col for col in data.columns if col.startswith('fmri')]]
In [6]:
fmri_msdl_filenames = fmri_data['fmri_msdl']
In [7]:
fmri = np.array([pd.read_csv(autism_data + "/" + subject_filename, header=None).values
for subject_filename in fmri_msdl_filenames])
In [8]:
anatomy = data[[col for col in data.columns if col.startswith('anatomy')]]
anatomy = anatomy.drop(columns='anatomy_select')
In [9]:
np.save('data/autism_fmri_ts.npy', fmri)
np.save('data/autism_anatomy.npy', anatomy)
np.save('data/autism_labels.npy', labels)